# study area
# studyarea <- st_read("data/StudyArea.shp") %>%
studyarea <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/studyarea/StudyArea.shp") %>%
st_transform('EPSG:2272')
## Reading layer `StudyArea' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/studyarea/StudyArea.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -8367662 ymin: 4859107 xmax: -8365440 ymax: 4860628
## Projected CRS: WGS 84 / Pseudo-Mercator
# Philly block groups
# blockgroups <- st_read("data/Philly_blockgroup.shp") %>%
blockgroups <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Philly_blockgroup/Philly_blockgroup.shp") %>%
st_transform('EPSG:2272')
## Reading layer `Philly_blockgroup' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Philly_blockgroup/Philly_blockgroup.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1338 features and 18 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -8380161 ymin: 4846701 xmax: -8344037 ymax: 4886015
## Projected CRS: WGS 84 / Pseudo-Mercator
# philly bounds
philly_bounds <- st_union(blockgroups)
# philly hydrology (bounded by philly_bounds; source: https://opendataphilly.org/datasets/hydrology/)
hydro <- st_read("https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson") %>%
st_transform(crs = 'EPSG:2272') %>%
st_intersection(philly_bounds)
## Reading layer `OGRGeoJSON' from data source
## `https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson'
## using driver `GeoJSON'
## Simple feature collection with 7970 features and 29 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -76.18462 ymin: 38.7667 xmax: -74.71871 ymax: 40.66593
## Geodetic CRS: WGS 84
# highway
# stateroads_inphilly <- st_read("data/PaStateRoads2024_03.geojson") %>%
# st_transform('EPSG:2272') %>%
# st_intersection(philly_bounds)
# write out stateroads_inphilly as its own file since it's big
# st_write(stateroads_inphilly,
# "data/PhillyStateRoads.shp")
# read in pre-filte#1a9988 stateroads data
stateroads_inphilly <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/PhillyStateRoads.shp")
## Reading layer `PhillyStateRoads' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/PhillyStateRoads.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2315 features and 105 fields
## Geometry type: MULTILINESTRING
## Dimension: XY
## Bounding box: xmin: 2660586 ymin: 207551.4 xmax: 2749290 ymax: 304306.6
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
philly_mainhighways <- stateroads_inphilly %>%
filter(TRAF_RT_NO %in% c("I", "US"),
ST_RT_NO %in% c("0001", "0095", "0076", "0676")) %>%
dplyr::select(STREET_NAM, ST_RT_NO, TRAF_RT_NO, TRAF_RT__1)
# save and only use the highways of interest
# for now, interested in comparing highways that cut through neighborhoods vs those that don't
# ACS
# philly property data (https://opendataphilly.org/datasets/philadelphia-properties-and-assessment-history/) <-- that was the original source, but now i'm using the same downloaded file that Luming and Sijia have been working from
philly_properties <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/opa_properties_public.geojson") %>%
st_transform(crs = 'EPSG:2272')
## Reading layer `opa_properties_public' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/opa_properties_public.geojson'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 584002 features and 78 fields (with 171 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.27439 ymin: 39.87513 xmax: -74.95819 ymax: 40.1377
## Geodetic CRS: WGS 84
# philly park data (https://opendataphilly.org/datasets/ppr-properties/)
philly_parks <- st_read("https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson") %>%
st_transform(crs = 'EPSG:2272') %>%
st_intersection(philly_bounds)
## Reading layer `PPR_Properties' from data source
## `https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson'
## using driver `GeoJSON'
## Simple feature collection with 507 features and 25 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -75.28353 ymin: 39.87117 xmax: -74.95865 ymax: 40.13186
## Geodetic CRS: WGS 84
# philly schools
school <-
st_read("https://opendata.arcgis.com/datasets/d46a7e59e2c246c891fbee778759717e_0.geojson") %>%
st_transform('EPSG:2272')
## Reading layer `Schools' from data source
## `https://opendata.arcgis.com/datasets/d46a7e59e2c246c891fbee778759717e_0.geojson'
## using driver `GeoJSON'
## Simple feature collection with 495 features and 15 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.2665 ymin: 39.90781 xmax: -74.97057 ymax: 40.12974
## Geodetic CRS: WGS 84
# city facilities (https://opendataphilly.org/datasets/city-facilities-master-facilities-database/)
facilities <- st_read("https://opendata.arcgis.com/datasets/b3c133c3b15d4c96bcd4d5cc09f19f4e_0.geojson") %>%
st_transform('EPSG:2272') %>%
filter(STATUS == "A") # remove inactive sites
## Reading layer `City_Facilities_pub' from data source
## `https://opendata.arcgis.com/datasets/b3c133c3b15d4c96bcd4d5cc09f19f4e_0.geojson'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 3197 features and 32 fields (with 5 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.28169 ymin: 39.86686 xmax: -74.96455 ymax: 40.13108
## Geodetic CRS: WGS 84
# libraries (from facilities data)
libraries <- facilities %>%
filter(ASSET_GROUP1 == "A8")
# prisons (from facilities data)
prisons <- facilities %>%
filter(ASSET_GROUP2 == "A13.3")
# historic site (from facilities data)
historic <- facilities %>%
filter(ASSET_SUBTYPE1 == "C21")
# museums (from facilities data)
museums <- facilities %>%
filter(ASSET_GROUP1 == "A18",
ASSET_SUBTYPE1 == "C35")
# produce (LPSS, HPSS -- indicators for produce; low or high produce supply stores)
retail_produce <- st_read("https://opendata.arcgis.com/datasets/53b8a1c653a74c92b2de23a5d7bf04a0_0.geojson") %>%
st_transform('EPSG:2272')
## Reading layer `370eb703-5a4f-4abb-8920-727cef31373b2020329-1-rcimn.5n39o' from data source `https://opendata.arcgis.com/datasets/53b8a1c653a74c92b2de23a5d7bf04a0_0.geojson'
## using driver `GeoJSON'
## Simple feature collection with 1336 features and 16 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -75.28031 ymin: 39.86747 xmax: -74.95575 ymax: 40.13793
## Geodetic CRS: WGS 84
# transit stops
metro <- st_read("https://opendata.arcgis.com/api/v3/datasets/af52d74b872045d0abb4a6bbbb249453_0/downloads/data?format=geojson&spatialRefId=4326") %>%
st_transform('EPSG:2272') %>%
mutate(Type = "metro")
## Reading layer `Highspeed_Stations' from data source
## `https://opendata.arcgis.com/api/v3/datasets/af52d74b872045d0abb4a6bbbb249453_0/downloads/data?format=geojson&spatialRefId=4326'
## using driver `GeoJSON'
## Simple feature collection with 74 features and 5 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.35358 ymin: 39.90543 xmax: -75.07788 ymax: 40.11349
## Geodetic CRS: WGS 84
city_hall <- metro[metro$Station == 'City Hall', 6]
trolley <- st_read("https://opendata.arcgis.com/api/v3/datasets/dd2afb618d804100867dfe0669383159_0/downloads/data?format=geojson&spatialRefId=4326") %>%
st_transform('EPSG:2272')
## Reading layer `Trolley_Stations' from data source
## `https://opendata.arcgis.com/api/v3/datasets/dd2afb618d804100867dfe0669383159_0/downloads/data?format=geojson&spatialRefId=4326'
## using driver `GeoJSON'
## Simple feature collection with 60 features and 5 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.394 ymin: 39.90667 xmax: -75.16256 ymax: 39.96216
## Geodetic CRS: WGS 84
# why are some trolley stations missing? going southwest from 40th st station + going northwest from 33rd/34th station
trolley_renamed <- trolley %>%
rename(Station = StopName,
Route = LineAbbr,
Longitude = Lon,
Latitude = Lat) %>%
mutate(Type = "trolley")
# Combine both datasets into one
transit <- bind_rows(metro, trolley_renamed)
# PA hospitals (within 2.5 miles of philly bounds)
hospitals <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/DOH_Hospitals202311.geojson") %>%
st_transform('EPSG:2272') %>%
st_intersection(st_buffer(philly_bounds, 13200))
## Reading layer `DOH_Hospitals202311' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/DOH_Hospitals202311.geojson'
## using driver `GeoJSON'
## Simple feature collection with 226 features and 14 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -80.49621 ymin: 39.75163 xmax: -74.86704 ymax: 42.13403
## Geodetic CRS: WGS 84
# checking out what Luming_property.csv looks like
luming_property <- read.csv("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Luming_property.csv")
# looking at which properties were saved in luming_property -- Luming and Sijia's properties files are limited to the case study area
# luming_properties <- philly_properties %>%
# filter(objectid %in% luming_property$objectid)
# checking out what property_sijia_eda.geojson looks like
sijia_eda <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/property_sijia_eda.geojson")
## Reading layer `property_sijia_eda' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/property_sijia_eda.geojson'
## using driver `GeoJSON'
## Simple feature collection with 2394 features and 98 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 2692333 ymin: 236562 xmax: 2697665 ymax: 240121.5
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
# google API to get restaurant data
# incomplete, will figure this out later
# restaurant_results <- google_places(search_string = paste(search_terms, city, sep = " "),
# key = google_api)
# interstate highways in philly
# ggplot() +
# geom_sf(data = philly_bounds) +
# geom_sf(data = stateroads_inphilly %>%
# filter(TRAF_RT_NO == "I"))
#
# # US highways in philly
# ggplot() +
# geom_sf(data = philly_bounds) +
# geom_sf(data = stateroads_inphilly %>%
# filter(TRAF_RT_NO == "US"))
#
# # PA state highways in philly
# ggplot() +
# geom_sf(data = philly_bounds) +
# geom_sf(data = stateroads_inphilly %>%
# filter(TRAF_RT_NO == "PA"))
#
# # all other roads in philly
# ggplot() +
# geom_sf(data = philly_bounds) +
# geom_sf(data = stateroads_inphilly %>%
# filter(!(TRAF_RT_NO %in% c("I", "US", "PA"))))
# I-95, US-1, I-676, and I-76
philly_highways_plot <-
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#eba075",
"0676" = "#7fe3d6"),
labels = c("US-1", "I-76", "I-95", "I-676")) +
labs(title = "Case Study of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways") +
theme_void()
philly_highways_plot
# ggsave("Output/HighwaysMap.png", plot = philly_highways_plot)
# looking at study area within philly bounds
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = studyarea, fill = "coral")
# philly parks
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent")
# philly schools
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = school, color = "brown4") # point data
# libraries
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = libraries, color = "#eb5600") # point data
# transit / looking at what this category actually consists of....
# looks like there's point geography for parking lots, some random transit stops, and heli pad station
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = facilities %>%
filter(ASSET_GROUP1 == "A17",
grepl("Parking", ASSET_SUBT1_DESC)), color = "pink") # point data
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = sijia_eda, color = "#7fe3d6")
# prison facilities
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = facilities %>%
filter(ASSET_GROUP2 == "A13.3"), color = "lightgrey") # point data
# all philly properties
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = philly_properties, color = "black")
For all Philly properties, calculate the distance from highway and see how it relates to price.
# taking Luming's distance calculator function
calculate_nearest_distance <- function(set_points, other_layer) {
nearest_idx <- st_nearest_feature(set_points, other_layer)
st_distance(set_points, other_layer[nearest_idx, ], by_element = TRUE) %>% as.numeric()
}
The following code chunk took too much to load (too many properties to consider), so I’m going to limit it to the properties within 500m (and possibly look at 1000m and 1500m) from highways to look at the immediate effects of highways on property prices.
# only keep properties that are within 500m (1640.42 ft) of highways of interest
buff500 <- 500 * 3.28084
highways_500buffer <- st_union(st_buffer(philly_mainhighways, buff500))
properties500 <- philly_properties[highways_500buffer,]
# only keep properties that are within 1000m (3280.84 ft) of highways of interest
buff1000 <- 1000 * 3.28084
highways_1000buffer <- st_union(st_buffer(philly_mainhighways, buff1000))
properties1000 <- philly_properties[highways_1000buffer,]
# only keep properties that are within 1500m (4921.26 ft) of highways of interest
buff1500 <- 1500 * 3.28084
highways_1500buffer <- st_union(st_buffer(philly_mainhighways, buff1500))
properties1500 <- philly_properties[highways_1500buffer,]
# visualizing buffer
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#eba075",
"0676" = "#7fe3d6")) +
labs(title = "Case Study of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways",
fill = "Highways") +
theme_void()
# visualizing properties to make sure we got the right ones
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500,
color = "#4a4a4a", size = .3) +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#eba075",
"0676" = "#7fe3d6")) +
labs(title = "Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways",
fill = "Highways") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_1000buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties1000,
color = "#4a4a4a", size = .3) +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#eba075",
"0676" = "#7fe3d6"),
labels = c("US-1", "I-76", "I-95", "I-676")) +
labs(title = "Properties within 1000m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways",
fill = "Highways") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_1500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties1500,
color = "#4a4a4a", size = .3) +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#eba075",
"0676" = "#7fe3d6")) +
labs(title = "Properties within 1500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways",
fill = "Highways") +
theme_void()
Are there differences between how price of property relates to dist_to_closest_highway depending on type of highway (whether highway cuts through neighborhood or not)?
# starting with 500m buffer
# need to check if there are 0 values in sale_price before log transforming
summary(properties500$sale_price)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 10 89900 327917 230000 342000000 840
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# 0 10 89900 327917 230000 342000000 840
# only keep properties whose sale_price >= $10,000
# according to #1a9988fin, the most expensive house in philadelphia right now is $25,000,000 -- using that as a cutoff
properties500_clean <- properties500 %>%
filter(sale_price >= 10000,
sale_price <= 25000000,
total_area >= 100,
total_livable_area >= 100,
sale_date >= as.Date("2005-01-01", format = "%Y-%m-%d"),
sale_date <= Sys.Date()) %>%
mutate(price_perTLA = sale_price / total_livable_area,
price_perTA = sale_price / total_area,
log_price = log(sale_price),
log_price_perTLA = log(price_perTLA),
log_price_perTA = log(price_perTA),
norm_log_price = scale(log_price),
norm_log_price_perTLA = scale(log_price_perTLA),
norm_log_price_perTA = scale(log_price_perTA),
category_easy = case_when(category_code_description %in% c("APARTMENTS > 4 UNITS",
"MIXED USE",
"MULTI FAMILY",
"SINGLE FAMILY") ~ "Residential or Mixed",
grepl("INDUS", category_code_description) ~ "Industrial",
grepl("HOTEL", category_code_description) ~ "Hotel",
grepl("OFFICES", category_code_description) ~ "Offices",
category_code_description %in% c("COMMERCIAL",
"GARAGE - COMMERCIAL",
"RETAIL") ~ "Commercial",
grepl("VACANT", category_code_description) ~ "Vacant",
.default = "Other"))
summary(properties500_clean$sale_price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10000 103000 174900 340263 295000 24750000
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 10000 103000 174900 340263 295000 24750000
summary(properties500_clean$price_perTLA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.77 78.86 130.36 181.26 208.33 11527.50
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.77 78.86 130.36 181.26 208.33 11527.5
summary(properties500_clean$price_perTA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.23 58.35 99.21 218.95 220.79 22671.57
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.23 58.35 99.21 218.95 220.79 22671.57
# correlations between total_area and total_livable_area, and sale_price
# cor(properties500_clean$total_area, properties500_clean$total_livable_area, use = "pairwise.complete.obs") # 0.5443428
# cor(properties500_clean$total_area, properties500_clean$sale_price, use = "pairwise.complete.obs") # 0.4210804
# cor(properties500_clean$total_livable_area, properties500_clean$sale_price, use = "pairwise.complete.obs") # 0.577406
# histogram of sale_price
ggplot(data = properties500_clean) +
geom_histogram(aes(x = sale_price),
bins = 100)
ggplot(data = properties500_clean) +
geom_histogram(aes(x = price_perTA),
bins = 100)
ggplot(data = properties500_clean) +
geom_histogram(aes(x = price_perTLA),
bins = 100)
ggplot(data = properties500_clean) +
geom_histogram(aes(x = log_price),
bins = 100)
ggplot(data = properties500_clean) +
geom_histogram(aes(x = log_price_perTA),
bins = 100)
ggplot(data = properties500_clean) +
geom_histogram(aes(x = log_price_perTLA),
bins = 100)
# map of properties within 500 m of highway, colored by sale_price
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
# geom_sf(data = highways_500buffer,
# color = "transparent", fill = "grey", alpha = 0.4) +
# geom_sf(data = properties500_clean,
# aes(color = sale_price), size = .3) +
# geom_sf(data = philly_mainhighways,
# color = "black") +
# labs(title = "Sale Price of Properties within 500m of Highways in Philly",
# subtitle = "US-1, I-76, I-95, and I-676") +
# theme_void()
# map of properties within 500 m of highway, colored by price_perTA
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
# geom_sf(data = highways_500buffer,
# color = "transparent", fill = "grey", alpha = 0.4) +
# geom_sf(data = properties500_clean,
# aes(color = price_perTA), size = .3) +
# geom_sf(data = philly_mainhighways,
# color = "black") +
# labs(title = "Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
# subtitle = "US-1, I-76, I-95, and I-676") +
# theme_void()
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
# geom_sf(data = highways_500buffer,
# color = "transparent", fill = "grey", alpha = 0.4) +
# geom_sf(data = properties500_clean,
# aes(color = price_perTLA), size = .3) +
# geom_sf(data = philly_mainhighways,
# color = "black") +
# labs(title = "Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
# subtitle = "US-1, I-76, I-95, and I-676") +
# theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "black", alpha = 0.2) +
geom_sf(data = properties500_clean,
aes(color = log_price), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Log Price") +
theme_void()
# house sale price (category_easy == "Residential or Mixed")
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "black", alpha = 0.2) +
geom_sf(data = properties500_clean %>%
filter(grepl("Resident", category_easy)),
aes(color = log_price), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price of Properties within 500m of Highways in Philly",
subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
color = "Log Price") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean,
aes(color = log_price_perTA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Log Price (per TA)") +
theme_void()
# same thing but residential or mixed use only
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean %>%
filter(grepl("Resident", category_easy)),
aes(color = log_price_perTA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
color = "Log Price (per TA)") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean,
aes(color = log_price_perTLA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Log Price (per TLA)") +
theme_void()
# same thing but residential/mixed use only
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean %>%
filter(grepl("Resident", category_easy)),
aes(color = log_price_perTLA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
color = "Log Price (per TLA)") +
theme_void()
Calculate each property’s distance to highways.
# calculate each property's distance to highways of interest
# distance is in feet
# start_time <- Sys.time()
prop500 <- properties500_clean %>%
select(matches("assessment_date|building_code|category_|census_tract|geographic|zip|market_value|parcel|sale_|taxable|_area|year_built|objectid|geometry|price")) %>%
mutate(dist_to_US001 = calculate_nearest_distance(geometry,
philly_mainhighways %>%
filter(ST_RT_NO == "0001")),
dist_to_I076 = calculate_nearest_distance(geometry,
philly_mainhighways %>%
filter(ST_RT_NO == "0076")),
dist_to_I095 = calculate_nearest_distance(geometry,
philly_mainhighways %>%
filter(ST_RT_NO == "0095")),
dist_to_I676 = calculate_nearest_distance(geometry,
philly_mainhighways %>%
filter(ST_RT_NO == "0676")),
dist_to_US001m = dist_to_US001 * 0.3048,
dist_to_I076m = dist_to_I076 * 0.3048,
dist_to_I095m = dist_to_I095 * 0.3048,
dist_to_I676m = dist_to_I676 * 0.3048) # Time difference of 55.20697 secs
# find closest highway
prop500_closest <- prop500 %>%
select(matches("objectid|dist_to")) %>%
pivot_longer(cols = 2:5,
names_to = "highway",
values_to = "distance") %>%
group_by(objectid) %>%
summarise(dist_to_closest_highway = min(distance, na.rm = T)) %>%
ungroup() %>%
mutate(dist_to_closest_highwaym = dist_to_closest_highway * 0.3048)
# join closest highway
prop500 <- left_join(prop500,
prop500_closest %>% st_drop_geometry(),
by = "objectid") %>%
mutate(closest_highway = case_when(
dist_to_closest_highway == dist_to_US001 ~ "US-1",
dist_to_closest_highway == dist_to_I076 ~ "I-76",
dist_to_closest_highway == dist_to_I095 ~ "I-95",
dist_to_closest_highway == dist_to_I676 ~ "I-676",
.default = NA),
closest_highway = factor(closest_highway, levels = c("US-1", "I-76", "I-676", "I-95")),
highway_type = case_when(closest_highway %in% c("US-1","I-676") ~ "through neighborhood",
closest_highway %in% c("I-95","I-76") ~ "along waterway",
.default = NA))
# save file
# st_write(prop500, "/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_prop500_dist2highway.geojson")
# read in pre-saved file
# prop500_new <- st_read("/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/data/philly_prop500_dist2highway.geojson")
First, let’s looks at a faceted plot of the distribution of sales price for each highway separately
# prop500_faceted <-
# ggplot(data = prop500,
# aes(x = dist_to_closest_highwaym, y = log_price_perTA)) +
# geom_point(color = "#1a9988", alpha = .1, size = 0.3) +
# geom_smooth(color = "#eb5600", method = "lm", se = F, linewidth = 1.2) +
# # geom_point(alpha = .1, size = 0.3) +
# # geom_smooth(method = "lm", se = F, linewidth = 1.2) +
# facet_wrap(~closest_highway) +
# labs(title = "Distribution of Property Sale Price",
# x = "Distance to Closest Highway (m)",
# y = "Log Sale Price (per TA)") +
# theme_minimal()
prop500_faceted <-
ggplot(data = prop500,
aes(x = log_price_perTA, color = closest_highway, fill = closest_highway)) +
geom_histogram(bins = 50) +
facet_wrap(~closest_highway, scales = "free_y") +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Distribution of Property Sale Price",
x = "Log Sale Price (per Total Area)",
y = "Count",
color = "Closest Highway",
fill = "Closest Highway") +
theme_minimal()
prop500_faceted
# save image for slides
# ggsave("Output/SalePriceDistribution.png", plot = prop500_faceted)
# residential + mixed only
# prop500_faceted_res <-
# ggplot(data = prop500 %>%
# filter(grepl("Reside", category_easy)),
# aes(x = dist_to_closest_highwaym, y = log_price_perTA)) +
# geom_point(color = "#1a9988", alpha = .1, size = 0.3) +
# geom_smooth(color = "#eb5600", method = "lm", se = F, linewidth = 1.2) +
# # geom_point(alpha = .1, size = 0.3) +
# # geom_smooth(method = "lm", se = F, linewidth = 1.2) +
# # facet_wrap(closest_highway) +
# labs(title = "Distribution of Property Sale Price",
# x = "Distance to Closest Highway (m)",
# y = "Log Sale Price (per TA)") +
# theme_minimal()
prop500_faceted_res <-
ggplot(data = prop500 %>%
filter(grepl("Resid", category_easy)),
aes(x = log_price_perTA, color = closest_highway, fill = closest_highway)) +
geom_histogram(bins = 50) +
facet_wrap(~closest_highway, scales = "free_y") +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Distribution of Property Sale Price",
subtitle = "Residential and Mixed Properties only",
x = "Log Sale Price (per Total Area)",
y = "Count",
color = "Closest Highway",
fill = "Closest Highway") +
theme_minimal()
prop500_faceted_res
# save image for slides
# ggsave("Output/SalePriceDistribution_residential.png", plot = prop500_faceted_res)
Now, look at relationship between distance to highway and price.
# map of properties within 500m of highway cored by which highway they're closest to
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = prop500,
aes(color = closest_highway), size = 0.3) +
geom_sf(data = philly_mainhighways,
color = "black") +
# add custom colors
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Closest Highway") +
theme_void()
# simple scatter of price and dist2closesthighway
ggplot(data = prop500,
aes(x = dist_to_closest_highwaym, y = sale_price,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .2) +
geom_smooth(method = "lm") +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Property Price as a function of Distance to Closest Highway",
subtitle = "Colored by Closest Highway; Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway",
x = "Distance to Closest Highway (m)",
y = "Property Sale Price ($)") +
theme_minimal()
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
scatter_4highways <-
ggplot(data = prop500,
aes(x = dist_to_closest_highwaym, y = log_price_perTA,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway",
subtitle = "Colored by Closest Highway; Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway",
x = "Distance to Closest Highway (m)",
y = "Property Sale Price (log price per TA)") +
theme_minimal()
scatter_4highways
# save image for slides
# ggsave("Output/PriceVSDist2Highway_4highways.png",
# plot = scatter_4highways,
# height = 7,
# width = 10)
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
# sale_price <= 1e7
ggplot(data = prop500 %>%
filter(sale_price <= 1e7),
aes(x = dist_to_closest_highwaym, y = log_price_perTA,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway",
subtitle = "Colored by Closest Highway; Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway",
x = "Distance to Closest Highway (m)",
y = "Property Sale Price (log price per TA)") +
theme_minimal()
# simple scatter of price (as log price per total area in sq ft) and log(dist2closesthighway)
ggplot(data = prop500,
aes(x = log(dist_to_closest_highwaym), y = log_price_perTA,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway (log)",
subtitle = "Colored by Closest Highway; Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway",
x = "Log Distance to Closest Highway (m)",
y = "Log Property Sale Price (per TA)") +
theme_minimal()
# simple regression of price (log_price_perTA) and dist2closesthighway
fit1 <- lm(log_price_perTA ~ dist_to_closest_highwaym, data = prop500)
summary(fit1)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym, data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.1688 -0.6772 -0.1453 0.6566 5.2436
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.804e+00 1.298e-02 370.09 < 2e-16 ***
## dist_to_closest_highwaym -2.111e-04 4.172e-05 -5.06 4.21e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.069 on 37293 degrees of freedom
## Multiple R-squared: 0.0006861, Adjusted R-squared: 0.0006593
## F-statistic: 25.61 on 1 and 37293 DF, p-value: 4.209e-07
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (4 types)
fit2 <- lm(log_price_perTA ~ dist_to_closest_highwaym + closest_highway, data = prop500)
summary(fit2)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + closest_highway,
## data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.6571 -0.5245 0.0563 0.5566 4.6625
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.312e+00 1.222e-02 352.831 < 2e-16 ***
## dist_to_closest_highwaym -2.741e-04 3.662e-05 -7.483 7.41e-14 ***
## closest_highwayI-76 9.473e-01 1.885e-02 50.255 < 2e-16 ***
## closest_highwayI-676 1.848e+00 3.899e-02 47.390 < 2e-16 ***
## closest_highwayI-95 1.011e+00 1.019e-02 99.169 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9317 on 37290 degrees of freedom
## Multiple R-squared: 0.2412, Adjusted R-squared: 0.2411
## F-statistic: 2964 on 4 and 37290 DF, p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (4 types) with interaction
fit3 <- lm(log_price_perTA ~ dist_to_closest_highwaym + closest_highway + dist_to_closest_highwaym*closest_highway, data = prop500)
summary(fit3)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + closest_highway +
## dist_to_closest_highwaym * closest_highway, data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.4704 -0.5124 0.0547 0.5539 4.6450
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 4.186e+00 1.573e-02 266.166
## dist_to_closest_highwaym 1.813e-04 5.120e-05 3.542
## closest_highwayI-76 3.368e-01 5.506e-02 6.116
## closest_highwayI-676 1.986e+00 8.292e-02 23.950
## closest_highwayI-95 1.374e+00 2.338e-02 58.773
## dist_to_closest_highwaym:closest_highwayI-76 1.752e-03 1.578e-04 11.100
## dist_to_closest_highwaym:closest_highwayI-676 -5.065e-04 3.115e-04 -1.626
## dist_to_closest_highwaym:closest_highwayI-95 -1.308e-03 7.586e-05 -17.243
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## dist_to_closest_highwaym 0.000398 ***
## closest_highwayI-76 9.69e-10 ***
## closest_highwayI-676 < 2e-16 ***
## closest_highwayI-95 < 2e-16 ***
## dist_to_closest_highwaym:closest_highwayI-76 < 2e-16 ***
## dist_to_closest_highwaym:closest_highwayI-676 0.103936
## dist_to_closest_highwaym:closest_highwayI-95 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9252 on 37287 degrees of freedom
## Multiple R-squared: 0.2519, Adjusted R-squared: 0.2517
## F-statistic: 1793 on 7 and 37287 DF, p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types)
fit4 <- lm(log_price_perTA ~ dist_to_closest_highwaym + highway_type, data = prop500)
summary(fit4)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + highway_type,
## data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.6296 -0.5491 0.0288 0.5530 5.6683
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.337e+00 1.293e-02 412.760 <2e-16 ***
## dist_to_closest_highwaym -3.567e-04 3.747e-05 -9.521 <2e-16 ***
## highway_typethrough neighborhood -9.454e-01 9.953e-03 -94.977 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9595 on 37292 degrees of freedom
## Multiple R-squared: 0.1953, Adjusted R-squared: 0.1953
## F-statistic: 4526 on 2 and 37292 DF, p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types) with interaction
fit5 <- lm(log_price_perTA ~ dist_to_closest_highwaym + highway_type + dist_to_closest_highwaym*highway_type, data = prop500)
sum5 <- summary(fit5)
data.frame(sum5$coefficients) %>%
mutate(Estimate = round(Estimate, 3),
StdErorr = round(Std..Error, 3),
PValue = round(Pr...t.., 3)) %>%
select(Estimate, StdErorr, PValue) %>%
kbl(align = c("l", rep("c", 3)),
caption = "Linear Regression: Sale Price ~ Distance to Closest Highway + Highway Type + Interaction") %>%
kable_styling() %>%
kable_classic(full_width = F, html_font = "Cambria")
| Estimate | StdErorr | PValue | |
|---|---|---|---|
| (Intercept) | 5.454 | 0.017 | 0 |
| dist_to_closest_highwaym | -0.001 | 0.000 | 0 |
| highway_typethrough neighborhood | -1.169 | 0.023 | 0 |
| dist_to_closest_highwaym:highway_typethrough neighborhood | 0.001 | 0.000 | 0 |
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types) with interaction
fit6 <- lm(log_price_perTA ~ log(dist_to_closest_highwaym) + highway_type + log(dist_to_closest_highwaym)*highway_type, data = prop500)
summary(fit6)
##
## Call:
## lm(formula = log_price_perTA ~ log(dist_to_closest_highwaym) +
## highway_type + log(dist_to_closest_highwaym) * highway_type,
## data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.5984 -0.5404 0.0279 0.5509 5.7509
##
## Coefficients:
## Estimate
## (Intercept) 6.05426
## log(dist_to_closest_highwaym) -0.14914
## highway_typethrough neighborhood -1.84302
## log(dist_to_closest_highwaym):highway_typethrough neighborhood 0.16410
## Std. Error
## (Intercept) 0.06130
## log(dist_to_closest_highwaym) 0.01108
## highway_typethrough neighborhood 0.08546
## log(dist_to_closest_highwaym):highway_typethrough neighborhood 0.01549
## t value Pr(>|t|)
## (Intercept) 98.76 <2e-16
## log(dist_to_closest_highwaym) -13.47 <2e-16
## highway_typethrough neighborhood -21.57 <2e-16
## log(dist_to_closest_highwaym):highway_typethrough neighborhood 10.59 <2e-16
##
## (Intercept) ***
## log(dist_to_closest_highwaym) ***
## highway_typethrough neighborhood ***
## log(dist_to_closest_highwaym):highway_typethrough neighborhood ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9583 on 37291 degrees of freedom
## Multiple R-squared: 0.1973, Adjusted R-squared: 0.1973
## F-statistic: 3056 on 3 and 37291 DF, p-value: < 2.2e-16
Visualizing the highway types (cutting through neighborhoods vs running alongside water).
# simple scatter of price and dist2closesthighway
ggplot(data = prop500,
aes(x = dist_to_closest_highwaym, y = sale_price,
color = highway_type, fill = highway_type)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm") +
scale_color_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
scale_fill_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
labs(title = "Property Price as a function of Distance to Highway Type",
subtitle = "Colored by Highway Type; Properties within 500m of highways",
fill = "Highway Type",
color = "Highway Type") +
theme_minimal()
# simple scatter of price and dist2closesthighway with loess method
ggplot(data = prop500,
aes(x = dist_to_closest_highwaym, y = sale_price,
color = highway_type, fill = highway_type)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "loess", se = FALSE, size = 1.2) +
scale_color_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
scale_fill_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
labs(title = "Property Price as a function of Distance to Highway Type",
subtitle = "Colored by Highway Type; Properties within 500m of highways",
fill = "Highway Type",
color = "Highway Type") +
theme_minimal()
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
scatter_2types <-
ggplot(data = prop500,
aes(x = dist_to_closest_highwaym, y = log_price_perTA,
color = highway_type, fill = highway_type)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
scale_fill_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
labs(title = "Property Price (log price per TA) as a function of Distance to Highway Type",
subtitle = "Colored by Highway Type; Properties within 500m of highways",
fill = "Highway Type",
color = "Highway Type",
x = "Distance to Closest Highway (m)",
y = "Log Sale Price (per TA)") +
theme_minimal()
scatter_2types
# save for slides
# ggsave("Output/PriceVSDist2Highway_2types.png",
# plot = scatter_2types,
# height = 7,
# width = 10)
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
ggplot(data = prop500,
aes(x = log(dist_to_closest_highwaym), y = log_price_perTA,
color = highway_type, fill = highway_type)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
scale_fill_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
labs(title = "Property Price (log price per TA) as a function of Distance to Highway Type",
subtitle = "Colored by Highway Type; Properties within 500m of highways",
fill = "Highway Type",
color = "Highway Type",
x = "Distance to Closest Highway (m)",
y = "Log Sale Price (per TA)") +
theme_minimal()
# correlations
prop500 %>%
st_drop_geometry() %>%
group_by(highway_type) %>%
summarize(correlation = cor(dist_to_closest_highwaym, log_price_perTA, method = "pearson"), .groups = "drop") %>%
mutate(correlation = round(correlation, 3)) %>%
kbl(col.names = c("Highway Type", "Correlation"),
align = c("l", "c"),
caption = "Correlation Between Property Sale Price and Distance to Closest Highway by Highway Types") %>%
kable_styling() %>%
kable_classic(full_width = F, html_font = "Cambria")
| Highway Type | Correlation |
|---|---|
| along waterway | -0.091 |
| through neighborhood | 0.005 |
# correlation of price and dist to highway for each highway
prop500 %>%
st_drop_geometry() %>%
group_by(closest_highway) %>%
summarize(correlation = cor(dist_to_closest_highwaym, log_price_perTA, method = "pearson"), .groups = "drop") %>%
mutate(correlation = round(correlation, 3)) %>%
kbl(col.names = c("Highway", "Correlation"),
align = c("l", "c"),
caption = "Correlation Between Property Sale Price and Distance to Closest Highway by Highway") %>%
kable_styling() %>%
kable_classic(html_font = "Cambria")
| Highway | Correlation |
|---|---|
| US-1 | 0.034 |
| I-76 | 0.181 |
| I-676 | -0.042 |
| I-95 | -0.139 |
prop500 %>%
st_drop_geometry() %>%
group_by(closest_highway) %>%
summarize(correlation = cor(log(dist_to_closest_highwaym), log_price_perTA, method = "pearson"), .groups = "drop")
## # A tibble: 4 × 2
## closest_highway correlation
## <fct> <dbl>
## 1 US-1 0.0363
## 2 I-76 0.211
## 3 I-676 0.00866
## 4 I-95 -0.133
Limiting the properties further to those under $1mil
prop500_under1m <- prop500 %>%
filter(sale_price < 1e6)
ggplot(data = prop500_under1m,
aes(x = dist_to_closest_highwaym, y = sale_price,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .1, size = 0.3) +
geom_smooth(method = "loess", se = FALSE, size = 1.2) +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Property Price (under $1M) as a function of Distance to Closest Highway",
subtitle = "Colored by Closest Highway; Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway") +
theme_minimal()
# correlation of price and dist to highway for each highway
cor <- prop500_under1m %>%
st_drop_geometry() %>%
group_by(closest_highway) %>%
summarize(correlation = cor(dist_to_closest_highway, log_price_perTA, method = "pearson"), .groups = "drop")
cor
## # A tibble: 4 × 2
## closest_highway correlation
## <fct> <dbl>
## 1 US-1 0.0494
## 2 I-76 0.157
## 3 I-676 0.0490
## 4 I-95 -0.146
# correlation of price and dist to closest highway
prop500_under1m %>%
st_drop_geometry() %>%
summarize(correlation = cor(dist_to_closest_highway, log_price_perTA, method = "pearson"), .groups = "drop")
## correlation
## 1 -0.02534909
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
ggplot(data = prop500_under1m,
aes(x = dist_to_closest_highwaym, y = log_price_perTA,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .1, size = 0.3) +
geom_smooth(method = "lm", se=F, size = 1.2) +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Property Price (log price per TA; under $1M) as a function of Distance to Closest Highway",
subtitle = "Colored by Closest Highway; Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway") +
theme_minimal()
First, I filtered the property data in the following ways.
Keep properties whose sale_date is between now and Jan. 1st, 2005
Remove properties whose sale price is < $10,000 and > $25M
Remove properties whose total area and total livable area are under 100 sq. ft.
Then, I calculated each property’s distance to the four highways of interest as well as amenities. The amenities that we’ve looked into so far are parks, libraries, transit stops, schools, City Hall (proxy for the central business district), hospitals, museums, and historic sites. We also included distance to prisons as a disamenity.
We still need to look at sale price’s relationship with restaurants and grocery stores (I just need to figure out how to use the google API to do this).
# takes about 7 min to run
# cleaned_properties <- philly_properties %>%
# filter(sale_price >= 10000,
# sale_price <= 25000000,
# total_area >= 100,
# total_livable_area >= 100,
# sale_date >= as.Date("2005-01-01", format = "%Y-%m-%d"),
# sale_date <= Sys.Date()) %>%
# mutate(price_perTLA = sale_price / total_livable_area,
# price_perTA = sale_price / total_area,
# log_price = log(sale_price),
# log_price_perTLA = log(price_perTLA),
# log_price_perTA = log(price_perTA),
# norm_log_price = scale(log_price),
# norm_log_price_perTLA = scale(log_price_perTLA),
# norm_log_price_perTA = scale(log_price_perTA),
# category_easy = case_when(category_code_description %in% c("APARTMENTS > 4 UNITS",
# "MIXED USE",
# "MULTI FAMILY",
# "SINGLE FAMILY") ~ "Residential or Mixed",
# grepl("INDUS", category_code_description) ~ "Industrial",
# grepl("HOTEL", category_code_description) ~ "Hotel",
# grepl("OFFICES", category_code_description) ~ "Offices",
# category_code_description %in% c("COMMERCIAL",
# "GARAGE - COMMERCIAL",
# "RETAIL") ~ "Commercial",
# grepl("VACANT", category_code_description) ~ "Vacant",
# .default = "Other"),
# dist_to_US001 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0001")),
# dist_to_I076 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0076")),
# dist_to_I095 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0095")),
# dist_to_I676 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0676")),
# dist_park = calculate_nearest_distance(geometry,
# philly_parks),
# dist_lib = calculate_nearest_distance(geometry,
# libraries),
# dist_transit = calculate_nearest_distance(geometry,
# transit),
# dist_school = calculate_nearest_distance(geometry,
# school),
# dist_cityhall = calculate_nearest_distance(geometry,
# city_hall),
# dist_hospital = calculate_nearest_distance(geometry,
# hospitals),
# dist_museum = calculate_nearest_distance(geometry,
# museums),
# dist_prison = calculate_nearest_distance(geometry,
# prisons),
# dist_historic = calculate_nearest_distance(geometry,
# historic),
# dist_to_US001m = dist_to_US001 * 0.3048,
# dist_to_I076m = dist_to_I076 * 0.3048,
# dist_to_I095m = dist_to_I095 * 0.3048,
# dist_to_I676m = dist_to_I676 * 0.3048,
# dist_parkm = dist_park * 0.3048,
# dist_libm = dist_lib * 0.3048,
# dist_transitm = dist_transit * 0.3048,
# dist_schoolm = dist_school * 0.3048,
# dist_cityhallm = dist_cityhall * 0.3048,
# dist_hospitalm = dist_hospital * 0.3048,
# dist_museumm = dist_museum * 0.3048,
# dist_prisonm = dist_prison * 0.3048,
# dist_historicm = dist_historic * 0.3048)
# find closest highway
# clean_closest <- cleaned_properties %>%
# select(matches("objectid|dist_to_US|dist_to_I")) %>%
# pivot_longer(cols = 2:5,
# names_to = "highway",
# values_to = "distance") %>%
# group_by(objectid) %>%
# summarise(dist_to_closest_highway = min(distance, na.rm = T)) %>%
# ungroup() %>%
# mutate(dist_to_closest_highwaym = dist_to_closest_highway * 0.3048)
# join closest highway
# cleaned_props <- left_join(cleaned_properties,
# clean_closest %>% st_drop_geometry(),
# by = "objectid") %>%
# mutate(closest_highway = case_when(
# dist_to_closest_highway == dist_to_US001 ~ "US-1",
# dist_to_closest_highway == dist_to_I076 ~ "I-76",
# dist_to_closest_highway == dist_to_I095 ~ "I-95",
# dist_to_closest_highway == dist_to_I676 ~ "I-676",
# .default = NA),
# closest_highway = factor(closest_highway, levels = c("US-1", "I-76", "I-676", "I-95")),
# highway_type = case_when(closest_highway %in% c("US-1","I-676") ~ "through neighborhood",
# closest_highway %in% c("I-95","I-76") ~ "along waterway",
# .default = NA)) %>%
# filter(!is.na(closest_highway))
# save for easier access later
# st_write(cleaned_props,
# "/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_cleaned_properties_with_amenities.geojson")
# read in pre-saved data
cleaned_props <- st_read("/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_cleaned_properties_with_amenities.geojson")
## Reading layer `philly_cleaned_properties_with_amenities' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_cleaned_properties_with_amenities.geojson'
## using driver `GeoJSON'
## Simple feature collection with 244003 features and 117 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 2662223 ymin: 210016.8 xmax: 2749428 ymax: 304861
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
# plot of log_price_perTA
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = cleaned_props, aes(color = log_price_perTA), size = 0.2) +
scale_color_gradient(high = "#eb5600",
low = "#f7cfb7") +
labs(title = "Property Price (log sale price per total area)",
color = "Log Price") +
theme_void()
I’m going to go through the following columns of interest (amenity predictors) and make a map + scatterplot and get the Pearson correlation coefficient between the amenity/disamenity + sale price (log_price + log_price_perTA).
dist_to_closest_highwaym
dist_parkm
dist_libm
dist_transitm
dist_schoolm
dist_cityhallm
dist_hospitalm
dist_museumm
dist_prisonm
dist_historicm
# only keeping two outcomes (log_price + log_price_perTA) and all amenities
vars_of_int <- cleaned_props %>%
select(log_price, log_price_perTA,
dist_to_closest_highwaym, dist_parkm, dist_libm, dist_transitm, dist_schoolm, dist_cityhallm, dist_hospitalm, dist_museumm, dist_prisonm, dist_historicm)
# cor matrix for corplot
cors_logprice <- cor(vars_of_int %>%
select(-log_price_perTA) %>%
rename(dist_2clssthghwy = dist_to_closest_highwaym) %>%
st_drop_geometry())
cors_logpriceperTA <- cor(vars_of_int %>%
select(-log_price) %>%
rename(dist_2clssthghwy = dist_to_closest_highwaym) %>%
st_drop_geometry())
# significance test
sigtest_logprice <- cor.mtest(vars_of_int %>%
select(-log_price_perTA) %>%
rename(dist_2clssthghwy = dist_to_closest_highwaym) %>%
st_drop_geometry())
sigtest_logpriceperTA <- cor.mtest(vars_of_int %>%
select(-log_price) %>%
rename(dist_2clssthghwy = dist_to_closest_highwaym) %>%
st_drop_geometry())
# pretty correlation matrix of all continuous variables
corrplot(cors_logprice,
title = "Correlation Matrix for Log Price",
p.mat = sigtest_logprice$p,
method = 'circle',
type = 'lower',
insig='blank',
addCoef.col ='black',
number.cex = 0.8,
diag=FALSE)
corrplot(cors_logpriceperTA,
title = "Correlation Matrix for Log Price",
p.mat = sigtest_logpriceperTA$p,
method = 'circle',
type = 'lower',
insig='blank',
addCoef.col ='black',
number.cex = 0.8,
diag=FALSE)
From the correlation matrix, we can see that distance to City Hall, distance to Transit, and distance to museums are all highly collinear with correlation coefficients ranging from .78 (transit and museum) to .98 (museum and city hall). Interestingly, distance to city hall and distance to museum are the variables with the correlation coefficient of highest magnitude with log_price_perTA (at -.38 and -.34 respectively), but distance to transit is not as correlated to the outcome variable (-.18).
When considering all properties in Philadelphia (after cleaning this data a bit), we see that distance to closest highway now has a negative relationship with property sale price (-0.18).
I’m moving forward with log_price_perTA as outcome for now because the size of the property explains a lot of the variance in property sale price already and right now we are focused on external predictors.
dat <- vars_of_int %>%
select(log_price_perTA, dist_parkm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Park"
cor_toplot <- round(cor(dat$dist_parkm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_parkm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 2000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Park (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_parkm)) +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent", alpha = 0.75) +
labs(title = "Distance to nearest park",
color = "Dist. to park") +
theme_void()
Looking at distance to park without the extreme outlier that is almost
2500m from the nearest park
dat <- vars_of_int %>%
select(log_price_perTA, dist_parkm) %>%
filter(dist_parkm < 2000)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Park"
cor_toplot <- round(cor(dat$dist_parkm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_parkm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 1500,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
subtitle = "Removed outlier",
x = "Distance to nearest Park (m)",
y = "Log Price (per total area)") +
theme_minimal()
Removing the outlier had no effect on the correlation.
dat <- vars_of_int %>%
select(log_price_perTA, dist_libm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Library"
cor_toplot <- round(cor(dat$dist_libm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_libm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 3000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Library (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_libm)) +
geom_sf(data = libraries, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest Library",
color = "Dist. to Lib.") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_transitm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Transit Stop"
cor_toplot <- round(cor(dat$dist_transitm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_transitm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 10000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Transit Stop (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_transitm)) +
geom_sf(data = transit, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest Transit stop",
color = "Dist. to station") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_schoolm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest School"
cor_toplot <- round(cor(dat$dist_schoolm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_schoolm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 2000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest School (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_schoolm)) +
geom_sf(data = school, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest School",
color = "Dist. to School") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_cityhallm)
scat_title <- "Log Price (per total area) as a function of Distance to City Hall"
cor_toplot <- round(cor(dat$dist_cityhallm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_cityhallm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 20000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to City Hall (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_cityhallm)) +
geom_sf(data = city_hall, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to City Hall",
color = "Dist. to CH") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_hospitalm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Hospital"
cor_toplot <- round(cor(dat$dist_hospitalm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_hospitalm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 5000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Hospital (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_hospitalm)) +
geom_sf(data = hospitals, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest Hospital",
color = "Dist. to Hospital") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_museumm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Museum"
cor_toplot <- round(cor(dat$dist_museumm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_museumm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 15000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Museum (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_museumm)) +
geom_sf(data = museums, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest Museum",
color = "Dist. to Museum") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_prisonm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Prison"
cor_toplot <- round(cor(dat$dist_prisonm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_prisonm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 9000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Prison (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_prisonm)) +
geom_sf(data = prisons, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest Prison",
color = "Dist. to Prison") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_historicm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Historic Site"
cor_toplot <- round(cor(dat$dist_historicm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_historicm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 6000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Historic Site (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_historicm)) +
geom_sf(data = historic, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to Historic Site",
color = "Dist. to Hist.") +
theme_void()